library(readxl)
library(xlsx)
library(sjPlot)
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(stringr)
library(ggExtra)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# linguistic data
if (rstudioapi::isAvailable()){
  currdir = dirname(rstudioapi::getSourceEditorContext()$path)
} else {
  currdir = getwd()
}
file_path = file.path(dirname(dirname(currdir)), 'data/extracted_data_3.xlsx')
data <- read_excel(file_path)
# Handling data
data$Agent = ifelse(data$conv == 1,"H","R")
data = data[!(data$locutor %in% c(1,4,19,23)),]
# Adding / renaming columns
data$Trial = data$conv_id_unif
data$Trial2 = paste0('t', str_pad(data$Trial, 2, pad = "0"))
# extra columns will add themselves automatically - just creating structures
df_overall = data.frame(mean=numeric(42),
                std=numeric(42), 
                row.names = c('lexical_richness_overall','lexical_richness_part','lexical_richness_conv','linguistic_complexity_overall','linguistic_complexity_part','linguistic_complexity_conv','content_complexity_overall','content_complexity_part','content_complexity_conv','nb_ipu_overall','nb_ipu_part','nb_ipu_conv','ratio_silence_lgth_overall','ratio_silence_lgth_part','ratio_silence_lgth_conv','sum_ipu_lgth_overall','sum_ipu_lgth_part','sum_ipu_lgth_conv','qt_discourse_overall','qt_discourse_part','qt_discourse_conv','qt_feedback_overall','qt_feedback_part','qt_feedback_conv','qt_filled_pause_overall','qt_filled_pause_part','qt_filled_pause_conv','ratio_discourse_overall','ratio_discourse_part','ratio_discourse_conv','nratio_feedback_overall','nratio_feedback_part','nratio_feedback_conv','ratio_filled_pause_overall','ratio_filled_pause_part','ratio_filled_pause_conv','mean_ipu_lgth_overall','mean_ipu_lgth_part','mean_ipu_lgth_conv','speech_rate_min4_overall','speech_rate_min4_part','speech_rate_min4_conv'),
                stringsAsFactors=FALSE)

lexical_richness

# computing values
df_overall['lexical_richness_overall', 'mean'] = mean(data$'lexical_richness')
df_overall['lexical_richness_overall', 'std'] = sd(data$'lexical_richness')
s = summary(aov(lexical_richness~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['lexical_richness_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'lexical_richness'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'lexical_richness'), tier=='participant')
# adding resume
df_overall['lexical_richness_conv', 'mean'] = mean(temp1$'lexical_richness')
df_overall['lexical_richness_conv', 'std'] = sd(temp1$'lexical_richness')
df_overall['lexical_richness_part', 'mean'] = mean(temp2$'lexical_richness')
df_overall['lexical_richness_part', 'std'] = sd(temp2$'lexical_richness')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['lexical_richness_part', c] = s2[a,b]
        df_overall['lexical_richness_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: lexical_richness conv",
            y = "VD: lexical_richness part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

linguistic_complexity

# computing values
df_overall['linguistic_complexity_overall', 'mean'] = mean(data$'linguistic_complexity')
df_overall['linguistic_complexity_overall', 'std'] = sd(data$'linguistic_complexity')
s = summary(aov(linguistic_complexity~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['linguistic_complexity_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'linguistic_complexity'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'linguistic_complexity'), tier=='participant')
# adding resume
df_overall['linguistic_complexity_conv', 'mean'] = mean(temp1$'linguistic_complexity')
df_overall['linguistic_complexity_conv', 'std'] = sd(temp1$'linguistic_complexity')
df_overall['linguistic_complexity_part', 'mean'] = mean(temp2$'linguistic_complexity')
df_overall['linguistic_complexity_part', 'std'] = sd(temp2$'linguistic_complexity')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['linguistic_complexity_part', c] = s2[a,b]
        df_overall['linguistic_complexity_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: linguistic_complexity conv",
            y = "VD: linguistic_complexity part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

content_complexity

# computing values
df_overall['content_complexity_overall', 'mean'] = mean(data$'content_complexity')
df_overall['content_complexity_overall', 'std'] = sd(data$'content_complexity')
s = summary(aov(content_complexity~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['content_complexity_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'content_complexity'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'content_complexity'), tier=='participant')
# adding resume
df_overall['content_complexity_conv', 'mean'] = mean(temp1$'content_complexity')
df_overall['content_complexity_conv', 'std'] = sd(temp1$'content_complexity')
df_overall['content_complexity_part', 'mean'] = mean(temp2$'content_complexity')
df_overall['content_complexity_part', 'std'] = sd(temp2$'content_complexity')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['content_complexity_part', c] = s2[a,b]
        df_overall['content_complexity_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: content_complexity conv",
            y = "VD: content_complexity part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

nb_ipu

# computing values
df_overall['nb_ipu_overall', 'mean'] = mean(data$'nb_ipu')
df_overall['nb_ipu_overall', 'std'] = sd(data$'nb_ipu')
s = summary(aov(nb_ipu~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['nb_ipu_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'nb_ipu'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'nb_ipu'), tier=='participant')
# adding resume
df_overall['nb_ipu_conv', 'mean'] = mean(temp1$'nb_ipu')
df_overall['nb_ipu_conv', 'std'] = sd(temp1$'nb_ipu')
df_overall['nb_ipu_part', 'mean'] = mean(temp2$'nb_ipu')
df_overall['nb_ipu_part', 'std'] = sd(temp2$'nb_ipu')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['nb_ipu_part', c] = s2[a,b]
        df_overall['nb_ipu_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: nb_ipu conv",
            y = "VD: nb_ipu part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

ratio_silence_lgth

# computing values
df_overall['ratio_silence_lgth_overall', 'mean'] = mean(data$'ratio_silence_lgth')
df_overall['ratio_silence_lgth_overall', 'std'] = sd(data$'ratio_silence_lgth')
s = summary(aov(ratio_silence_lgth~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['ratio_silence_lgth_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'ratio_silence_lgth'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'ratio_silence_lgth'), tier=='participant')
# adding resume
df_overall['ratio_silence_lgth_conv', 'mean'] = mean(temp1$'ratio_silence_lgth')
df_overall['ratio_silence_lgth_conv', 'std'] = sd(temp1$'ratio_silence_lgth')
df_overall['ratio_silence_lgth_part', 'mean'] = mean(temp2$'ratio_silence_lgth')
df_overall['ratio_silence_lgth_part', 'std'] = sd(temp2$'ratio_silence_lgth')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['ratio_silence_lgth_part', c] = s2[a,b]
        df_overall['ratio_silence_lgth_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: ratio_silence_lgth conv",
            y = "VD: ratio_silence_lgth part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

sum_ipu_lgth

# computing values
df_overall['sum_ipu_lgth_overall', 'mean'] = mean(data$'sum_ipu_lgth')
df_overall['sum_ipu_lgth_overall', 'std'] = sd(data$'sum_ipu_lgth')
s = summary(aov(sum_ipu_lgth~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['sum_ipu_lgth_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'sum_ipu_lgth'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'sum_ipu_lgth'), tier=='participant')
# adding resume
df_overall['sum_ipu_lgth_conv', 'mean'] = mean(temp1$'sum_ipu_lgth')
df_overall['sum_ipu_lgth_conv', 'std'] = sd(temp1$'sum_ipu_lgth')
df_overall['sum_ipu_lgth_part', 'mean'] = mean(temp2$'sum_ipu_lgth')
df_overall['sum_ipu_lgth_part', 'std'] = sd(temp2$'sum_ipu_lgth')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['sum_ipu_lgth_part', c] = s2[a,b]
        df_overall['sum_ipu_lgth_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: sum_ipu_lgth conv",
            y = "VD: sum_ipu_lgth part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

qt_discourse

# computing values
df_overall['qt_discourse_overall', 'mean'] = mean(data$'qt_discourse')
df_overall['qt_discourse_overall', 'std'] = sd(data$'qt_discourse')
s = summary(aov(qt_discourse~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['qt_discourse_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'qt_discourse'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'qt_discourse'), tier=='participant')
# adding resume
df_overall['qt_discourse_conv', 'mean'] = mean(temp1$'qt_discourse')
df_overall['qt_discourse_conv', 'std'] = sd(temp1$'qt_discourse')
df_overall['qt_discourse_part', 'mean'] = mean(temp2$'qt_discourse')
df_overall['qt_discourse_part', 'std'] = sd(temp2$'qt_discourse')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['qt_discourse_part', c] = s2[a,b]
        df_overall['qt_discourse_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: qt_discourse conv",
            y = "VD: qt_discourse part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

qt_feedback

# computing values
df_overall['qt_feedback_overall', 'mean'] = mean(data$'qt_feedback')
df_overall['qt_feedback_overall', 'std'] = sd(data$'qt_feedback')
s = summary(aov(qt_feedback~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['qt_feedback_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'qt_feedback'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'qt_feedback'), tier=='participant')
# adding resume
df_overall['qt_feedback_conv', 'mean'] = mean(temp1$'qt_feedback')
df_overall['qt_feedback_conv', 'std'] = sd(temp1$'qt_feedback')
df_overall['qt_feedback_part', 'mean'] = mean(temp2$'qt_feedback')
df_overall['qt_feedback_part', 'std'] = sd(temp2$'qt_feedback')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['qt_feedback_part', c] = s2[a,b]
        df_overall['qt_feedback_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: qt_feedback conv",
            y = "VD: qt_feedback part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

qt_filled_pause

# computing values
df_overall['qt_filled_pause_overall', 'mean'] = mean(data$'qt_filled_pause')
df_overall['qt_filled_pause_overall', 'std'] = sd(data$'qt_filled_pause')
s = summary(aov(qt_filled_pause~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['qt_filled_pause_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'qt_filled_pause'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'qt_filled_pause'), tier=='participant')
# adding resume
df_overall['qt_filled_pause_conv', 'mean'] = mean(temp1$'qt_filled_pause')
df_overall['qt_filled_pause_conv', 'std'] = sd(temp1$'qt_filled_pause')
df_overall['qt_filled_pause_part', 'mean'] = mean(temp2$'qt_filled_pause')
df_overall['qt_filled_pause_part', 'std'] = sd(temp2$'qt_filled_pause')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['qt_filled_pause_part', c] = s2[a,b]
        df_overall['qt_filled_pause_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: qt_filled_pause conv",
            y = "VD: qt_filled_pause part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

ratio_discourse

# computing values
df_overall['ratio_discourse_overall', 'mean'] = mean(data$'ratio_discourse')
df_overall['ratio_discourse_overall', 'std'] = sd(data$'ratio_discourse')
s = summary(aov(ratio_discourse~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['ratio_discourse_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'ratio_discourse'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'ratio_discourse'), tier=='participant')
# adding resume
df_overall['ratio_discourse_conv', 'mean'] = mean(temp1$'ratio_discourse')
df_overall['ratio_discourse_conv', 'std'] = sd(temp1$'ratio_discourse')
df_overall['ratio_discourse_part', 'mean'] = mean(temp2$'ratio_discourse')
df_overall['ratio_discourse_part', 'std'] = sd(temp2$'ratio_discourse')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['ratio_discourse_part', c] = s2[a,b]
        df_overall['ratio_discourse_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: ratio_discourse conv",
            y = "VD: ratio_discourse part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

nratio_feedback

# computing values
df_overall['nratio_feedback_overall', 'mean'] = mean(data$'nratio_feedback')
df_overall['nratio_feedback_overall', 'std'] = sd(data$'nratio_feedback')
s = summary(aov(nratio_feedback~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['nratio_feedback_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'nratio_feedback'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'nratio_feedback'), tier=='participant')
# adding resume
df_overall['nratio_feedback_conv', 'mean'] = mean(temp1$'nratio_feedback')
df_overall['nratio_feedback_conv', 'std'] = sd(temp1$'nratio_feedback')
df_overall['nratio_feedback_part', 'mean'] = mean(temp2$'nratio_feedback')
df_overall['nratio_feedback_part', 'std'] = sd(temp2$'nratio_feedback')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['nratio_feedback_part', c] = s2[a,b]
        df_overall['nratio_feedback_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: nratio_feedback conv",
            y = "VD: nratio_feedback part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

ratio_filled_pause

# computing values
df_overall['ratio_filled_pause_overall', 'mean'] = mean(data$'ratio_filled_pause')
df_overall['ratio_filled_pause_overall', 'std'] = sd(data$'ratio_filled_pause')
s = summary(aov(ratio_filled_pause~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['ratio_filled_pause_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'ratio_filled_pause'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'ratio_filled_pause'), tier=='participant')
# adding resume
df_overall['ratio_filled_pause_conv', 'mean'] = mean(temp1$'ratio_filled_pause')
df_overall['ratio_filled_pause_conv', 'std'] = sd(temp1$'ratio_filled_pause')
df_overall['ratio_filled_pause_part', 'mean'] = mean(temp2$'ratio_filled_pause')
df_overall['ratio_filled_pause_part', 'std'] = sd(temp2$'ratio_filled_pause')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['ratio_filled_pause_part', c] = s2[a,b]
        df_overall['ratio_filled_pause_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: ratio_filled_pause conv",
            y = "VD: ratio_filled_pause part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

mean_ipu_lgth

# computing values
df_overall['mean_ipu_lgth_overall', 'mean'] = mean(data$'mean_ipu_lgth')
df_overall['mean_ipu_lgth_overall', 'std'] = sd(data$'mean_ipu_lgth')
s = summary(aov(mean_ipu_lgth~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['mean_ipu_lgth_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'mean_ipu_lgth'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'mean_ipu_lgth'), tier=='participant')
# adding resume
df_overall['mean_ipu_lgth_conv', 'mean'] = mean(temp1$'mean_ipu_lgth')
df_overall['mean_ipu_lgth_conv', 'std'] = sd(temp1$'mean_ipu_lgth')
df_overall['mean_ipu_lgth_part', 'mean'] = mean(temp2$'mean_ipu_lgth')
df_overall['mean_ipu_lgth_part', 'std'] = sd(temp2$'mean_ipu_lgth')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['mean_ipu_lgth_part', c] = s2[a,b]
        df_overall['mean_ipu_lgth_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: mean_ipu_lgth conv",
            y = "VD: mean_ipu_lgth part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

speech_rate_min4

# computing values
df_overall['speech_rate_min4_overall', 'mean'] = mean(data$'speech_rate_min4')
df_overall['speech_rate_min4_overall', 'std'] = sd(data$'speech_rate_min4')
s = summary(aov(speech_rate_min4~Agent*Trial2, data=data))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['speech_rate_min4_overall', c] = s[a,b]
    }
}
temp1 = subset(data, select = c("locutor", "Trial2", "Agent", 'speech_rate_min4'), tier=='conversant')
temp2 = subset(data, select = c("locutor", "Trial2", "Agent", 'speech_rate_min4'), tier=='participant')
# adding resume
df_overall['speech_rate_min4_conv', 'mean'] = mean(temp1$'speech_rate_min4')
df_overall['speech_rate_min4_conv', 'std'] = sd(temp1$'speech_rate_min4')
df_overall['speech_rate_min4_part', 'mean'] = mean(temp2$'speech_rate_min4')
df_overall['speech_rate_min4_part', 'std'] = sd(temp2$'speech_rate_min4')

# creating merged data - ling
colnames(temp1) = c("locutor", "Trial2", "Agent", "data_conv")
colnames(temp2) = c("locutor", "Trial2", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial2", "Agent"))
# computing values
s1 = summary(aov(data_conv~Agent*Trial2, data=merres))[[1]]
s2 = summary(aov(data_part~Agent*Trial2, data=merres))[[1]]
for (a in c('Agent ', 'Trial2', 'Agent:Trial2')){
    for (b in c('F value', 'Pr(>F)')){
        c = ifelse(b == 'F value', paste0(trimws(a), '_z'), paste0(trimws(a), '_p'))
        df_overall['speech_rate_min4_part', c] = s2[a,b]
        df_overall['speech_rate_min4_conv', c] = s1[a,b]
    }
}
# plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + geom_smooth(method = 'lm') + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: speech_rate_min4 conv",
            y = "VD: speech_rate_min4 part",
            color = "Agent")
ggMarginal(g, type="densigram", margins = "both", groupColour = TRUE, fill="white")

Saver

if (rstudioapi::isAvailable()){
  file_path = file.path(dirname(rstudioapi::getSourceEditorContext()$path), 'summary.xlsx')
} else {
  file_path = file.path(getwd(), 'summary.xlsx')
}
# Write the first data set in a new workbook
write.xlsx(df_overall, file = file_path,
      sheetName = 'summary', append = FALSE)
# Write others sheets